gorgonia.org/gorgonia@v0.9.17/cuda modules/src/elemunaryop.cu (about) 1 #define _USE_MATH_DEFINES 2 #include <math.h> 3 4 #define THREADID \ 5 int blockId = blockIdx.x + blockIdx.y * gridDim.x + gridDim.x * gridDim.y * blockIdx.z;\ 6 int idx = blockId * (blockDim.x * blockDim.y * blockDim.z) + (threadIdx.z * (blockDim.x * blockDim.y)) + (threadIdx.y * blockDim.x) + threadIdx.x; 7 8 #define CHECKSIZE \ 9 if (idx >= size) { \ 10 return; \ 11 } 12 13 #define UNARYOP(name, t, type, op)\ 14 __global__ void name ##_##t (type* A, int size) { \ 15 THREADID \ 16 CHECKSIZE \ 17 A[idx] = op(A[idx]); \ 18 } 19 20 21 extern "C" { UNARYOP(cos, f64, double, cos) } 22 extern "C" { UNARYOP(sin, f64, double, sin) } 23 extern "C" { UNARYOP(exp, f64, double, exp) } 24 extern "C" { UNARYOP(ln, f64, double, log) } 25 extern "C" { UNARYOP(log2, f64, double, log2) } 26 extern "C" { UNARYOP(sqrt, f64, double, sqrt) } 27 extern "C" { UNARYOP(tanh, f64, double, tanh) } 28 extern "C" { UNARYOP(log1p, f64, double, log1p) } 29 extern "C" { UNARYOP(expm1, f64, double, expm1) } 30 31 // un-differentiable 32 extern "C" { UNARYOP(abs, f64, double, abs) } 33 extern "C" { UNARYOP(ceil, f64, double, ceil) } 34 extern "C" { UNARYOP(floor, f64, double, floor) } 35 36 37 extern "C" { 38 __global__ void sign_f64(double* A, int size) { 39 THREADID 40 CHECKSIZE 41 A[idx] = (A[idx] > 0.0) - (A[idx] < 0.0); 42 } 43 } 44 45 extern "C" { 46 __global__ void square_f64(double* A, int size) { 47 THREADID 48 CHECKSIZE 49 A[idx] = A[idx] * A[idx]; 50 } 51 } 52 53 extern "C" { 54 __global__ void cube_f64(double* A, int size) { 55 THREADID 56 CHECKSIZE 57 A[idx] = A[idx] * A[idx] * A[idx]; 58 } 59 } 60 61 extern "C" { 62 __global__ void neg_f64(double* A, int size) { 63 THREADID 64 CHECKSIZE 65 A[idx] = -A[idx]; 66 } 67 } 68 69 extern "C" { 70 __global__ void inverse_f64(double* A, int size) { 71 THREADID 72 CHECKSIZE 73 A[idx] = 1.0/A[idx]; 74 } 75 } 76 77 extern "C" { 78 __global__ void softplus_f64(double* A, int size) { 79 THREADID 80 CHECKSIZE 81 if (A[idx] < -708.0) { 82 A[idx] = 0.0; 83 } else if (A[idx] > 16.0) { 84 // no op 85 } else { 86 A[idx] = log1p(exp(A[idx])); 87 } 88 } 89 } 90 91 extern "C" { 92 __global__ void sigmoid_f64(double* A, int size) { 93 THREADID 94 CHECKSIZE 95 if (A[idx] < -709.0) { 96 A[idx] = 0.0; 97 } else if (A[idx] > 19.0) { 98 A[idx] = 1.0; 99 } else { 100 A[idx] = 1.0 / (1.0 + exp(-A[idx])); 101 } 102 // alternative sigmoid function: 103 // A[idx] = 1 / (1 + pow(M_E, (double)(-1 * A[idx]))); 104 } 105 } 106 107 /* FLOAT32 */ 108 109 extern "C" { UNARYOP(cos, f32, float, cosf) } 110 extern "C" { UNARYOP(sin, f32, float, sinf) } 111 extern "C" { UNARYOP(exp, f32, float, expf) } 112 extern "C" { UNARYOP(ln, f32, float, logf) } 113 extern "C" { UNARYOP(log2, f32, float, log2f) } 114 extern "C" { UNARYOP(sqrt, f32, float, sqrtf) } 115 extern "C" { UNARYOP(tanh, f32, float, tanhf) } 116 extern "C" { UNARYOP(log1p, f32, float, log1pf) } 117 extern "C" { UNARYOP(expm1, f32, float, expm1f) } 118 119 // un-differentiable 120 extern "C" { UNARYOP(abs, f32, float, abs) } 121 extern "C" { UNARYOP(ceil, f32, float, ceilf) } 122 extern "C" { UNARYOP(floor, f32, float, floorf) } 123 124 125 extern "C" { 126 __global__ void sign_f32(float* A, int size) { 127 THREADID 128 CHECKSIZE 129 A[idx] = (A[idx] > 0.0f) - (A[idx] < 0.0f); 130 } 131 } 132 133 extern "C" { 134 __global__ void square_f32(float* A, int size) { 135 THREADID 136 CHECKSIZE 137 A[idx] = A[idx] * A[idx]; 138 } 139 } 140 141 extern "C" { 142 __global__ void cube_f32(float* A, int size) { 143 THREADID 144 CHECKSIZE 145 A[idx] = A[idx] * A[idx] * A[idx]; 146 } 147 } 148 149 extern "C" { 150 __global__ void neg_f32(float* A, int size) { 151 THREADID 152 CHECKSIZE 153 A[idx] = -A[idx]; 154 } 155 } 156 157 extern "C" { 158 __global__ void inverse_f32(float* A, int size) { 159 THREADID 160 CHECKSIZE 161 A[idx] = 1.0f/A[idx]; 162 } 163 } 164 165 extern "C" { 166 __global__ void softplus_f32(float* A, int size) { 167 THREADID 168 CHECKSIZE 169 if (A[idx] < -103.0f) { 170 A[idx] = 0.0f; 171 } else if (A[idx] > 14.0f) { 172 // no op 173 } else { 174 A[idx] = log1pf(expf(A[idx])); 175 } 176 } 177 } 178 179 extern "C" { 180 __global__ void sigmoid_f32(float* A, int size) { 181 THREADID 182 CHECKSIZE 183 if (A[idx] < -88.0f) { 184 A[idx] = 0.0f; 185 } else if (A[idx] > 15.0f) { 186 A[idx] = 1.0f; 187 } else { 188 A[idx] = 1.0f / (1.0f + expf(-A[idx])); 189 } 190 // alternative sigmoid function: 191 // A[idx] = 1 / (1 + powf((float)(M_E), (-1 * A[idx]))); 192 } 193 }